##### Packages

library(xtable)
library(glmmTMB)
library(MASS)
library(openxlsx)
library(tidyverse)
library(sjPlot)
library(sandwich)
library(lmtest)
library(texreg)



##### Data

findat <- read.csv(".../Main_data.csv")

fedat <- read.csv(".../FE_data.csv") 

fedat2 <- read.csv(".../FE2_data.csv") 

distr <-  read.xlsx(".../Districts_Time_Series.xlsx")

countrydis <- read.xlsx(".../Districts.xlsx")

cros <- read.xlsx(".../Cross_Sec.xlsx")

##### Figures

### Figure 1: Evolution effective electoral thresholds 

PRint <- distr %>% group_by(Country) %>% summarize(PR = mean(PR))

ggplot(distr, aes(Year,eff_thres))+ geom_line()+facet_wrap(.~Country)+ theme_bw()+ 
  geom_vline(data = PRint, aes(xintercept=PR), linetype = "dashed", colour = "red")+
  theme(axis.line.x = element_line(colour = "black"), axis.line.y = element_line(colour = "black"),strip.background =element_rect(fill="white"),
        panel.border = element_blank())+labs(title = "", x = "Year",
                                             y = "Effective Electoral Threshold")

### Figure A1: Bivariate density plot

ggplot(findat, aes(x=vot_sh, y=spmax) ) +
  geom_hex(bins = 40) +
  scale_fill_continuous(type = "viridis") +
  theme_bw()+labs(title = "", 
                  x = "Vote Share Pre-Reform Districts",
                  y = "Max. Vote Share Neigh. Districts")

##### Descriptives

### Table 1: Distribution of PR district magnitude

countrydis2 <- countrydis %>% group_by(crty) %>% summarize(N = n(), Mean = mean(mag), Median = median(mag),
                                                  Standard = sd(mag), Minimum = min(mag), Maximum = max(mag), stdme = sd(mag)/mean(mag)) %>%
  filter(!(crty %in% c("Denmark (1918)","Germany (1918)")))

colnames(countrydis2) <- c("Country","N","Mean","Median","Std. Dev.","Min.","Max.","Coef. of Var.")


tabx <- xtable(countrydis2, caption = "Distribution of PR District Magnitude by Country", label = "district", 
               align = c("c","c","c","c","c","c","c","c","c"))
print(tabx, booktabs = TRUE,  include.rownames = F, hline.after = c(-1, 0, nrow(tabx)))


### Table 3: Summary Statistics

sumdat <- findat %>% dplyr::select(vot_sh,spmax,mrdis,pr_dis_mag, soc) %>% rename (votsh = vot_sh, prdis = pr_dis_mag) %>%
  summarise_each(list(min = min, 
                      median = median, 
                      max = max,
                      mean = mean, 
                      sd = sd)) 

sumdat <- sumdat %>% gather(stat, val) %>%
  separate(stat, into = c("var", "stat"), sep = "_") %>%
  spread(stat, val) %>%
  dplyr::select(var, mean, median, sd, min, max)

colnames(sumdat) <- c("Variable","Mean","Median","Standard Deviation","Minimum","Maximum")

sumdat$Variable <- c("Pre-Reform District Magnitude","Post-Reform District Magnitude","Socialist Party","Max. Vote Share Contiguous Districts","Vote Share")

tabx <- xtable(sumdat, caption = "Summary Statistics", label = "sumstat", 
               align = c("c","c","c","c","c","c","c"))
print(tabx, booktabs = TRUE,  include.rownames = F, hline.after = c(-1, 0, nrow(tabx)),
      scalebox = 0.8)

### Table 4: Distribution of PR district magnitude (Several reforms Denmark/Germany/Norway)

# Norway I (1919, 126 SMDs) was added by hand in Latex

countrydis2 <- countrydis %>% group_by(crty) %>% summarize(N = n(), Mean = mean(mag), Median = median(mag),
                                                          Standard = sd(mag), Minimum = min(mag), Maximum = max(mag), stdme = sd(mag)/mean(mag)) %>%
  filter(crty %in% c("Denmark (1915)","Denmark (1918)","Germany (1917)","Germany (1918)","Norway (1918)"))

colnames(countrydis2) <- c("Country","N","Mean","Median","Std. Dev.","Min.","Max.","Coef. of Var.")


tabx <- xtable(countrydis2, caption = "Distribution of PR District Magnitude by Country", label = "district", 
               align = c("c","c","c","c","c","c","c","c","c"))
print(tabx, booktabs = TRUE,  include.rownames = F, hline.after = c(-1, 0, nrow(tabx)))


### Table A1: Summary statistics by country (Appendix) 

land <-  unique(findat$crty)

crtyy <- list()

for (i in land){
  
  sumdat <- findat %>% filter(crty == i) %>% 
    dplyr::select(vot_sh,spmax,mrdis,pr_dis_mag, soc) %>% rename (votsh = vot_sh, prdis = pr_dis_mag) %>%
    summarize(across(votsh:soc, list(mean = mean, median = median, sd = sd, min = min, max = max)))%>% 
    gather(stat, val)  %>%
    separate(stat,into = c("var", "stat"), sep = "_") %>% 
    spread(stat, val) %>%
    dplyr::select(var, mean, median, sd, min, max)
  
  crtyy[[i]] <- sumdat  
}

sumdat <- bind_rows(crtyy) 

land

land <- c("Belgium","Switzerland","Denmark","France","Germany","Norway") 

colnames(sumdat) <- c("Variable","Mean","Median","Standard Deviation","Minimum","Maximum")

sumdat$Variable <- rep(c("Pre-Reform District Magnitude","Post-Reform District Magnitude","Socialist Party","Max. Vote Share Contiguous Districts","Vote Share"),length(land))

tabx <- xtable(sumdat, caption = "Summary Statistics by Country", label = "sumstat_crty", 
               align = c("c","c","c","c","c","c","c"))

addtorow <- list()
addtorow$pos <- as.list(c(
  0, 5,10,15,20,25    #position for second command (longtable argument)
))
addtorow$command <- as.vector(c(paste0(paste0('& \\multicolumn{5}{l}{\\textbf{', land[1], '}}', collapse=''), '\\\\'),
                                paste0(paste0('& \\multicolumn{5}{c}{\\textbf{', land[2], '}}', collapse=''), '\\\\'),
                                paste0(paste0('& \\multicolumn{5}{c}{\\textbf{', land[3], '}}', collapse=''), '\\\\'),
                                paste0(paste0('& \\multicolumn{5}{c}{\\textbf{', land[4], '}}', collapse=''), '\\\\'),
                                paste0(paste0('& \\multicolumn{5}{c}{\\textbf{', land[5], '}}', collapse=''), '\\\\'),
                                paste0(paste0('& \\multicolumn{5}{c}{\\textbf{', land[6], '}}', collapse=''), '\\\\')))

print(tabx, booktabs = TRUE,  include.rownames = F, hline.after = c(-1,0,  nrow(tabx)), add.to.row=addtorow,
      scalebox = 0.8)


##### Estimation 

### Table 5: Main results

fulmod <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+crty)

fulmod2 <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+crty+Party)

fulmod3 <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+(1|crty)+(1|Party))

fulmod4 <- as.formula(log(pr_dis_mag)~vot_sh*spmax+mrdis+soc+crty)

fulmod5 <- as.formula(log(pr_dis_mag)~vot_sh*spmax+mrdis+soc+crty+Party)

fulmod6 <- as.formula(log(pr_dis_mag)~vot_sh*spmax+mrdis+soc+(1|crty)+(1|Party))

m1 <- glm.nb(fulmod, findat, weights = basew,
             control = glm.control(maxit=10000))
ct1 <- coeftest(m1, vcovCL(m1, cluster = ~ crty))
m1se <- ct1[, 2]
m1pval <- ct1[, 4]

m2 <- glm.nb(fulmod2, findat, weights = basew,
             control = glm.control(maxit=10000))
ct2 <- coeftest(m2, vcovCL(m2, cluster = ~ crty))
m2se <- ct2[, 2]
m2pval <- ct2[, 4]

m3 <- glmmTMB(fulmod3, findat, weights = basew, family = nbinom2)

m4 <- lm(fulmod4, findat, weights = basew)
ct4 <- coeftest(m4, vcovCL(m4, cluster = ~ crty))
m4se <- ct4[, 2]
m4pval <- ct4[, 4]

m5 <- lm(fulmod5, findat, weights = basew)
ct5 <- coeftest(m5, vcovCL(m5, cluster = ~ crty))
m5se <- ct5[, 2]
m5pval <- ct5[, 4]

m6 <- glmmTMB(fulmod6, findat, weights = basew, family = gaussian())

texreg(list(m1,m2,m3,m4,m5,m6), override.se = list(m1se,m2se,NA,m4se,m5se,NA), 
       override.pvalues = list(m1pval,m2pval,NA,m4pval,m5pval,NA), sideways = T,
       custom.coef.map = list("vot_sh" = "Vote Share","spmax" = "Max. Vote Share Neigh. Districts",
                              "vot_sh:spmax" = "Vote Share X Max. Vote Share Neigh. Districts", 
                              "mrdis" = "Pre-Reform District Magnitude", "soc" = "Socialist Party",
                              "(Intercept)" = "Intercept"), omit.coef = "(crty)",
       caption = "Electoral Geography \\& PR District Magnitude", label = "est1", #sideways = T,
       booktabs = T, dcolumn = T, use.packages = F, 
       no.margin = T,custom.header = list("Neg. Binomial" = 1:3, "OLS" = 4:6),
       custom.gof.rows = list(
         "Country FE" = c(rep("Yes",2),"No",rep("Yes",2),"No"),
         "Party FE" = c(rep(c("No","Yes","No"),2)),
         "Country RE" = c(rep("No",2),"Yes",rep("No",2),"Yes"),
         "Party RE" = c(rep(c("No","No","Yes"),2)),
         "Cluster-Robust Std. Err. by Country" = c(rep("Yes",2),"No",rep("Yes",2),"No")))

### Figure 3: Predicted district magnitude  

predat <- plot_model(m1, type = "eff", terms = c("vot_sh [0.3, .9]", "spmax [0.3, .9]"), vcov.fun = "vcovCL", 
                     vcov.args = list(cluster = ~ crty))$data


coupan <- predat %>% filter( (x == 0.3 & group == "0.3") |  (x == 0.9 & group == "0.9")) %>%
  mutate(scen = ifelse(x > 0.2 & x<0.4 & group == "0.3", "Dispersed", "Concentrated")) 

ggplot(coupan, aes(scen, predicted)) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.05,position=position_dodge(1)) + theme_bw() + 
  geom_point(position=position_dodge(1)) +labs(title = "", 
                                               x = "Electoral Geography",
                                               y = "Predicted District Magnitude")+
  theme(
    axis.title.x = element_text(size = 20),
    axis.title.y = element_text(size = 20),axis.text = element_text(size = 14)
  )




### Table 6: Main results

fulmod <- as.formula(log(pr_dis_mag)~vot_sh*spmax+soc+factor(districtid))

m7 <- glm.nb(pr_dis_mag~vot_sh*spmax+soc+factor(districtid), fedat, weights = basew,
             control = glm.control(maxit=10000))
ct7 <- coeftest(m7, vcovCL(m7, cluster = ~ districtid))
m7se <- ct7[, 2]
m7pval <- ct7[, 4]

m8 <- lm(fulmod, fedat, weights = basew)
ct8 <- coeftest(m8, vcovCL(m8, cluster = ~ districtid))
m8se <- ct8[, 2]
m8pval <- ct8[, 4]


texreg(list(m7,m8), override.se = list(m7se,m8se), override.pvalues = list(m7pval,m8pval), 
       custom.coef.map = list("vot_sh" = "Vote Share","spmax" = "Max. Vote Share Neigh. Districts",
                              "vot_sh:spmax" = "Vote Share X Max. Vote Share Neigh. Districts", 
                              "soc" = "Socialist Party",
                              "(Intercept)" = "Intercept"),
       custom.note = "Cluster Robust Standard Errors by Electoral District. *** p < 0.001; ** p < 0.01; * p < 0.05.",
       omit.coef = "(districtid)",
       caption = "Electoral Geography \\& PR District Magnitude: District FE", label = "est2", 
       booktabs = T, dcolumn = T, use.packages = F,
       no.margin = T,custom.header = list("Neg. Binomial" = 1,"OLS" = 2),
       custom.gof.rows = list("District FE" = c(rep("Yes",2))))

### Table 7: Main results

cros <- cros %>% arrange(code,party) %>% group_by(code,party) %>% mutate(dis = dis*100,ldis = lag(dis), chdis = dis-ldis) %>% drop_na(ldis)

m1 <- MASS::rlm(dis~ldis+infl,cros)
ct0 <- coeftest(m1, vcovCL(m1, cluster = ~code) )
m0se <- ct0[, 2]
m0pval <- ct0[, 4]
m2 <- MASS::rlm(dis~infl+code,cros)
ct1 <- coeftest(m2, vcovCL(m2, cluster = ~code))
m1se <- ct1[, 2]
m1pval <- ct1[, 4]


texreg(list(m1,m2),  override.se = list(m0se,m1se), symbol = "\\dagger",custom.header = list("Disproportionality PR" = 1:2),
       override.pvalues = list(m0pval,m1pval),stars = c(0.001, 0.01, 0.05, 0.1),
       custom.coef.map = list("infl" = "Influence on District Design","ldis" = "Disproportionality MR","(Intercept)" = "Intercept"),
       booktabs = T, dcolumn = T, use.packages = F,
       no.margin = T, custom.model.names = c("Dispro. Lag","Country FE"),
       caption = "Influence on Design and Disproportionality under PR", label = "disest", 
       custom.note = "Country-level cluster-robust standard errors in parenthesis. %stars.")


### Not included in paper: FE estimation without Copenhagen

fulmod <- as.formula(log(pr_dis_mag)~vot_sh*spmax+soc+factor(districtid))

m1 <- MASS::glm.nb(pr_dis_mag~vot_sh*spmax+soc+factor(districtid), fedat2, weights = basew,
                   control = glm.control(maxit=10000))
ct1 <- coeftest(m1, vcovCL(m1, cluster = ~ districtid))
m1se <- ct1[, 2]
m1pval <- ct1[, 4]

m2 <- lm(fulmod, fedat2, weights = basew)
ct2 <- coeftest(m2, vcovCL(m2, cluster = ~ districtid))
m2se <- ct2[, 2]
m2pval <- ct2[, 4]

texreg(list(m1,m2), override.se = list(m1se,m2se),
          override.pvalues = list(m1pval,m2pval), omit.coef = "(districtid)")

### Table A2: Main Estimations dropping one country

fulmod <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+crty+Party)

fulmodm <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+(1|crty)+(1|Party))

### Belgium

datwc <- findat %>% filter( crty != "BEL")
k <- length(unique(datwc$crty))
datwc <- datwc %>% mutate(total = n(), ncrty = k) %>%
  group_by(crty) %>% mutate(count = n()) %>% ungroup() %>%
  mutate(basew = 1/(count/total)) %>% dplyr::select(-total,-ncrty,-count)

bel <- glm.nb(fulmod, datwc, weights = basew,
              control = glm.control(maxit=10000))
bel1 <- coeftest(bel, vcovCL(bel, cluster = ~ crty))
belse <- bel1[, 2]
belpval <- bel1[, 4]

belm <- glmmTMB(fulmodm, datwc, weights = basew, family = nbinom2)

### Switzerland

datwc <- findat %>% filter( crty != "CH")
k <- length(unique(datwc$crty))
datwc <- datwc %>% mutate(total = n(), ncrty = k) %>%
  group_by(crty) %>% mutate(count = n()) %>% ungroup() %>%
  mutate(basew = 1/(count/total)) %>% dplyr::select(-total,-ncrty,-count)

ch <- glm.nb(fulmod, datwc, weights = basew,
             control = glm.control(maxit=10000))
ch1 <- coeftest(ch, vcovCL(ch, cluster = ~ crty))
chse <- ch1[, 2]
chpval <- ch1[, 4]

chm <- glmmTMB(fulmodm, datwc, weights = basew, family = nbinom2)


### Denmark

datwc <- findat %>% filter( crty != "DNK")
k <- length(unique(datwc$crty))
datwc <- datwc %>% mutate(total = n(), ncrty = k) %>%
  group_by(crty) %>% mutate(count = n()) %>% ungroup() %>%
  mutate(basew = 1/(count/total)) %>% dplyr::select(-total,-ncrty,-count)

dnk <- glm.nb(fulmod, datwc, weights = basew,
              control = glm.control(maxit=10000))
dnk1 <- coeftest(dnk, vcovCL(dnk, cluster = ~ crty))
dnkse <- dnk1[, 2]
dnkpval <- dnk1[, 4]

dnkm <- glmmTMB(fulmodm, datwc, weights = basew, family = nbinom2)

### France

datwc <- findat %>% filter( crty != "FRA")
k <- length(unique(datwc$crty))
datwc <- datwc %>% mutate(total = n(), ncrty = k) %>%
  group_by(crty) %>% mutate(count = n()) %>% ungroup() %>%
  mutate(basew = 1/(count/total)) %>% dplyr::select(-total,-ncrty,-count)

fra <- glm.nb(fulmod, datwc, weights = basew,
              control = glm.control(maxit=10000))
fra1 <- coeftest(fra, vcovCL(fra, cluster = ~ crty))
frase <- fra1[, 2]
frapval <- fra1[, 4]

fram <- glmmTMB(fulmodm, datwc, weights = basew, family = nbinom2)

### Germany

datwc <- findat %>% filter( crty != "GER")
k <- length(unique(datwc$crty))
datwc <- datwc %>% mutate(total = n(), ncrty = k) %>%
  group_by(crty) %>% mutate(count = n()) %>% ungroup() %>%
  mutate(basew = 1/(count/total)) %>% dplyr::select(-total,-ncrty,-count)

ger <- glm.nb(fulmod, datwc, weights = basew,
              control = glm.control(maxit=10000))
ger1 <- coeftest(ger, vcovCL(ger, cluster = ~ crty))
gerse <- ger1[, 2]
gerpval <- ger1[, 4]

germ <- glmmTMB(fulmodm, datwc, weights = basew, family = nbinom2)

### Norway

datwc <- findat %>% filter( crty != "NOR")
k <- length(unique(datwc$crty))
datwc <- datwc %>% mutate(total = n(), ncrty = k) %>%
  group_by(crty) %>% mutate(count = n()) %>% ungroup() %>%
  mutate(basew = 1/(count/total)) %>% dplyr::select(-total,-ncrty,-count)

nor <- glm.nb(fulmod, datwc, weights = basew,
              control = glm.control(maxit=10000))
nor1 <- coeftest(nor, vcovCL(nor, cluster = ~ crty))
norse <- nor1[, 2]
norpval <- nor1[, 4]

norm <- glmmTMB(fulmodm, datwc, weights = basew, family = nbinom2)

texreg(list(bel,belm,ch,chm,dnk,dnkm,fra,fram,ger,germ,nor,norm), 
       override.pvalues = list(belpval,NA,chpval,NA,dnkpval,NA,frapval,NA,gerpval,NA,norpval,NA),
       override.se = list(belse,NA,chse,NA,dnkse,NA,frase,NA,gerse,NA,norse,NA), sideways = T,
       custom.coef.map = list("vot_sh" = "Vote Share","spmax" = "Max. Vote Share Neigh. Districts",
                              "vot_sh:spmax" = "Vote Share X Max. Vote Share Neigh. Districts", 
                              "mrdis" = "Pre-Reform District Magnitude", "soc" = "Socialist Party",
                              "(Intercept)" = "Intercept"), omit.coef = "(crty)",
       caption = "Robustness: Negative Binomial Dropping one Country", label = "estwc", scalebox = 0.7,
       booktabs = T, dcolumn = T, use.packages = F, 
       no.margin = T,custom.header = list("Without Belgium" = 1:2,
                                          "Without Switzerland" = 3:4,
                                          "Without Denmark" = 5:6,
                                          "Without France" = 7:8,
                                          "Without Germany" = 9:10,
                                          "Without Norway" = 11:12),
       custom.gof.rows = list(
         "Country FE" = c(rep(c("Yes","No"),6)),
         "Party FE" = c(rep(c("Yes","No"),6)),
         "Country RE" = c(rep(c("No","Yes"),6)),
         "Party RE" = c(rep(c("No","Yes"),6)),
         "Cluster-Robust Std. Err. by Country" = c(rep(c("Yes","No"),6))))


### Table A3: Without weights

fulmod <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+crty)

fulmod2 <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+crty+Party)

fulmod3 <- as.formula(pr_dis_mag~vot_sh*spmax+mrdis+soc+(1|crty)+(1|Party))

fulmod4 <- as.formula(log(pr_dis_mag)~vot_sh*spmax+mrdis+soc+crty)

fulmod5 <- as.formula(log(pr_dis_mag)~vot_sh*spmax+mrdis+soc+crty+Party)

fulmod6 <- as.formula(log(pr_dis_mag)~vot_sh*spmax+mrdis+soc+(1|crty)+(1|Party))


m1 <- glm.nb(fulmod, findat,
             control = glm.control(maxit=10000))
ct1 <- coeftest(m1, vcovCL(m1, cluster = ~ crty))
m1se <- ct1[, 2]
m1pval <- ct1[, 4]

m2 <- glm.nb(fulmod2, findat, 
             control = glm.control(maxit=10000))
ct2 <- coeftest(m2, vcovCL(m2, cluster = ~ crty))
m2se <- ct2[, 2]
m2pval <- ct2[, 4]

m3 <- glmmTMB(fulmod3, findat, family = nbinom2)

m4 <- lm(fulmod4, findat)
ct4 <- coeftest(m4, vcovCL(m4, cluster = ~ crty))
m4se <- ct4[, 2]
m4pval <- ct4[, 4]

m5 <- lm(fulmod5, findat)
ct5 <- coeftest(m5, vcovCL(m5, cluster = ~ crty))
m5se <- ct5[, 2]
m5pval <- ct5[, 4]

m6 <- glmmTMB(fulmod6, findat, family = gaussian())

texreg(list(m1,m2,m3,m4,m5,m6), override.se = list(m1se,m2se,NA,m4se,m5se,NA), 
       override.pvalues = list(m1pval,m2pval,NA,m4pval,m5pval,NA), sideways = T,
       custom.coef.map = list("vot_sh" = "Vote Share","spmax" = "Max. Vote Share Neigh. Districts",
                              "vot_sh:spmax" = "Vote Share X Max. Vote Share Neigh. Districts", 
                              "mrdis" = "Pre-Reform District Magnitude", "soc" = "Socialist Party",
                              "(Intercept)" = "Intercept"), omit.coef = "(crty)",
       caption = "Electoral Geography \\& PR District Magnitude (Without Weights)", label = "est1wow", #sideways = T,
       booktabs = T, dcolumn = T, use.packages = F,
       no.margin = T,custom.header = list("Neg. Binomial" = 1:3, "OLS" = 4:6),
       custom.gof.rows = list(
         "Country FE" = c(rep("Yes",2),"No",rep("Yes",2),"No"),
         "Party FE" = c(rep(c("No","Yes","No"),2)),
         "Country RE" = c(rep("No",2),"Yes",rep("No",2),"Yes"),
         "Party RE" = c(rep(c("No","No","Yes"),2)),
         "Cluster-Robust Std. Err. by Country" = c(rep("Yes",2),"No",rep("Yes",2),"No")))



